{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "令系统的用户集合为 U， R(u) 是根据用户在训练集上的行为给用户作出的推荐列表，而 T(u) 是用户在测试集上的行为列表。那么推荐结果的准确率定义为：\n",
    "$$\n",
    "Precision=\\frac{\\sum_{u\\in U}|R(u)\\cap T(u)|}{\\sum_{u\\in U}|R(u)|}\n",
    "$$\n",
    "推荐结果的召回率定义为：\n",
    "$$\n",
    "Recall=\\frac{\\sum_{u\\in U}|R(u)\\cap T(u)|}{\\sum_{u\\in U}|T(u)|}\n",
    "$$\n",
    "\n",
    "推荐系统的覆盖率为：\n",
    "$$\n",
    "Coverage=\\frac{\\sum_{u\\in U}|R(u)|}{|I|}\n",
    "$$\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "def Precision(train, test, N):\n",
    "    hit = 0\n",
    "    all = 0\n",
    "    for user in train.keys():\n",
    "        tu = test[user]\n",
    "        rank = GetRecommendation(user, N)\n",
    "        for item, pui in rank:\n",
    "            if item in tu:\n",
    "                hit += 1\n",
    "        all += N\n",
    "    return hit / (all * 1.0)\n",
    "\n",
    "def Recall(train, test, N):\n",
    "    hit = 0\n",
    "    all = 0\n",
    "    for user in train.keys():\n",
    "        tu = test[user]\n",
    "        rank = GetRecommendation(user, N)\n",
    "        for item, pui in rank:\n",
    "            if item in tu:\n",
    "                hit += 1\n",
    "        all += len(tu)\n",
    "    return hit / (all * 1.0)\n",
    "\n",
    "def Coverage(train, test, N):\n",
    "    recommend_items = set()\n",
    "    all_items = set()\n",
    "    for user in train.keys():\n",
    "        for item in train[user].keys():\n",
    "            all_items.add(item)\n",
    "        rank = GetRecommendation(user, N)\n",
    "        for item, pui in rank:\n",
    "            recommend_items.add(item)\n",
    "    return len(recommend_items) / (len(all_items) * 1.0)\n",
    "\n",
    "def Popularity(train, test, N):\n",
    "    item_popularity = dict()\n",
    "    for user, items in train.items():\n",
    "        for item in items.keys():\n",
    "            if item not in item_popularity:\n",
    "                item_popularity[item] = 0\n",
    "                item_popularity[item] += 1\n",
    "    ret = 0\n",
    "    n=0\n",
    "    for user in train.keys():\n",
    "        rank = GetRecommendation(user, N)\n",
    "        for item, pui in rank:\n",
    "            ret += math.log(1 + item_popularity[item])\n",
    "            n += 1\n",
    "    ret /= n * 1.0\n",
    "    return ret"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.metrics import mean_squared_error\n",
    "from math import sqrt\n",
    "\n",
    "def rmse(prediction, ground_truth):\n",
    "    prediction = prediction[ground_truth.nonzero()].flatten() \n",
    "    ground_truth = ground_truth[ground_truth.nonzero()].flatten()\n",
    "    return sqrt(mean_squared_error(prediction, ground_truth))\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def evaluate(self):\n",
    "        ''' print evaluation result: precision, recall, coverage and popularity '''\n",
    "        print ('Evaluation start...', file=sys.stderr)\n",
    "\n",
    "        N = self.n_rec_movie\n",
    "        #  varables for precision and recall\n",
    "        hit = 0\n",
    "        rec_count = 0\n",
    "        test_count = 0\n",
    "        # varables for coverage\n",
    "        all_rec_movies = set()\n",
    "        # varables for popularity\n",
    "        popular_sum = 0\n",
    "\n",
    "        for i, user in enumerate(self.trainset):\n",
    "            if i % 500 == 0:\n",
    "                print ('recommended for %d users' % i, file=sys.stderr)\n",
    "            test_movies = self.testset.get(user, {})\n",
    "            rec_movies = self.recommend(user)\n",
    "            for movie, _ in rec_movies:\n",
    "                if movie in test_movies:\n",
    "                    hit += 1\n",
    "                all_rec_movies.add(movie)\n",
    "                popular_sum += math.log(1 + self.movie_popular[movie])\n",
    "            rec_count += N\n",
    "            test_count += len(test_movies)\n",
    "\n",
    "        precision = hit / (1.0 * rec_count)\n",
    "        recall = hit / (1.0 * test_count)\n",
    "        coverage = len(all_rec_movies) / (1.0 * self.movie_count)\n",
    "        popularity = popular_sum / (1.0 * rec_count)\n",
    "\n",
    "        print ('precision=%.4f\\trecall=%.4f\\tcoverage=%.4f\\tpopularity=%.4f' %\n",
    "               (precision, recall, coverage, popularity), file=sys.stderr)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
